In [29]:

    
from pyfasta import Fasta
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord



In [36]:

    
# Requires this file to be in the current working directory
all_trans = Fasta("Homo_sapiens.GRCh38.cdna.all.fa")

Get the first `max_trans` transcripts



In [14]:

    
min_len = 500
count = 0
max_trans = 10
trans_to_keep = {}
for tname in all_trans.keys():
    if count == max_trans:
        break
    if len(all_trans[tname]) > min_len:
        trans_to_keep[tname] = str(all_trans[tname])
        count += 1

Now output them



In [33]:

    
trans_recs = []
for tname in trans_to_keep.keys():
    trans_recs.append(SeqRecord(seq = Seq(trans_to_keep[tname]),
                                id = tname.split()[0],
                                description = ""))



In [34]:

    
out_fname = "{0}_trans_gt_{1}_bp.fasta".format(max_trans, min_len)



In [35]:

    
with open(out_fname, "w") as out_fhandle:
    SeqIO.write(trans_recs, out_fhandle, "fasta")



In [ ]:

Get the first max_trans transcripts

Now output them

Get the first `max_trans` transcripts